******************************************************************************************
* Replication file for 

* Title:		The Racialized Pandemic: Wave One of Covid-19 and the Reproduction of Global North Inequalities
* Authors:	Gerda Hooijer and Desmond King
* Contact:	g.hooijer@ucl.ac.uk
* Date: 		May 16, 2021
******************************************************************************************

* This file prepares the datasets used in the paper.

version 16
set more off

// set directory
cd "insert link to folder"

*** data_1.dta ***

// United States
* Unemployment rate of whites 
import excel "ur_us.xlsx", sheet("Sheet1") cellrange(A13:M34) firstrow clear 
rename Jan us_white1
rename Feb us_white2
rename Mar us_white3
rename Apr us_white4 
rename May us_white5 
rename Jun us_white6 
rename Jul us_white7 
rename Aug us_white8 
rename Sep us_white9 
rename Oct us_white10 
rename Nov us_white11 
rename Dec us_white12
reshape long us_white, i(Year) j(month)
save temp_us1, replace 

* Unemployment rate of African Americans 
import excel "ur_us.xlsx", sheet("Sheet2") cellrange(A13:M34) firstrow clear 
rename Jan us_black1
rename Feb us_black2
rename Mar us_black3
rename Apr us_black4 
rename May us_black5 
rename Jun us_black6 
rename Jul us_black7 
rename Aug us_black8 
rename Sep us_black9 
rename Oct us_black10 
rename Nov us_black11 
rename Dec us_black12
reshape long us_black, i(Year) j(month)
save temp_us2, replace

* Unemployment rate of Latinos
import excel "ur_us.xlsx", sheet("Sheet3") cellrange(A13:M34) firstrow clear 
rename Jan us_latino1
rename Feb us_latino2
rename Mar us_latino3
rename Apr us_latino4 
rename May us_latino5 
rename Jun us_latino6 
rename Jul us_latino7 
rename Aug us_latino8 
rename Sep us_latino9 
rename Oct us_latino10 
rename Nov us_latino11 
rename Dec us_latino12
reshape long us_latino, i(Year) j(month)

* Merge
merge 1:1 Year month using temp_us1
drop _merge 
merge 1:1 Year month using temp_us2 
drop _merge 

* Save US file
keep if _n>72 & _n<249 // keep observations from January 2006 onwards 
gen time = _n
save ur_us, replace

// Sweden
import excel "ur_se.xlsx", sheet("Sheet1") firstrow clear
keep if _n>9 // keep observations from January 2006 onwards 
encode time_string, gen(time)
gen q = mofd(dofq(time))

rename total_nat se_total_nat 
rename total_fb se_total_fb
keep time_string se_total_nat se_total_fb time q

* Save Sweden file
save ur_se, replace

// United Kingdom
import excel "ur_uk.xlsx", sheet("Sheet1") firstrow clear
keep if _n>19 // keep observations from January 2006 onwards

* Recode
rename White uk_white
rename EthnicMinority2 uk_bame
keep quarter uk_white uk_bame

encode quarter, gen(q)
rename quarter quarter_string

gen time = 3 if q==1
replace time = time[_n-1]+3 if missing(time)

save ur_uk, replace 

// Netherlands
import excel "ur_nl.xlsx", sheet("Sheet1") firstrow clear
keep if _n>12 // keep observations from January 2006 onwards

* Recode
rename Nederlandseachtergrond nl_native 
rename Metmigratieachtergrond nl_migrant 
rename Westersemigratieachtergrond nl_western_migrant 
rename Nietwestersemigratieachtergron nl_nonwestern_migrant 

encode quarter_string, gen(q)

gen time = 3 if q==1
replace time = time[_n-1]+3 if missing(time)

save ur_nl, replace 

// Combine country-specific files
use ur_us, clear 
merge 1:1 time using ur_se
drop _merge 
merge 1:1 time using ur_uk 
drop _merge
merge 1:1 time using ur_nl 
drop _merge

// Calculate gap in unemployment rates
gen us_unemp_gap = us_black - us_white 
gen uk_unemp_gap = uk_bame - uk_white
gen se_unemp_gap = se_total_fb - se_total_nat 
gen nl_unemp_gap = nl_nonwestern_migrant - nl_native

// Label time variable
*ssc install labutil // install package once
egen ym = concat(Year month), p(M)
labmask time, values(ym)

// Save file
drop if Year==. 
save data_1, replace 

* Erase temporary files
erase temp_us1.dta
erase temp_us2.dta
erase ur_us.dta
erase ur_uk.dta
erase ur_nl.dta
erase ur_se.dta



*** data_2.dta ***

// Occupational data for the US 

// SOC2010 codes for CPS/Census data
import excel "censusocc2010.xlsx", sheet("Sheet1") firstrow clear
gen soc_title = lower(Occupation)
rename SOCCODES soc_code
drop if soc_code==""
keep soc_title soc_code
save us_censocc10, replace

// Merge SOC2010 codes with CPS/Census data
import excel "occ_us.xlsx", sheet("Sheet1") firstrow clear
gen soc_title = lower(occupation)
merge 1:1 soc_title using us_censocc10 
keep if _merge==3
drop _merge 
save temp_us3, replace 

// Crosswalk ONET to SOC2010
import excel "onet_to_soc.xls", sheet("Sheet1") firstrow clear
rename ONETSOC2010Code onet 
rename ONETSOC2010Title onet_occupation
save onet_soc10, replace

// Merge ONET data with ONET crosswalk
import excel "onet.xls", sheet("Sheet1") firstrow clear 
merge 1:1 onet using "onet_soc10"
keep if _merge==3
drop _merge
* collapse because ONET is more detailed than SOC
collapse (mean) proximity_soc = proximity, by(soc_code)
save onet, replace 

// Merge CPS/Census data with ONET data
use temp_us3, clear 
merge 1:1 soc_code using onet
keep if _merge==3 
drop _merge 

// Save US file
save data_2, replace 

// Erase temporary files 
erase temp_us3.dta 
erase us_censocc10.dta
erase onet_soc10.dta 
erase onet.dta 



*** data_3.dta ***

// Occupational data for the UK 
import excel "occ_uk.xlsx", sheet("Sheet1") firstrow clear
drop if soc_code==.

* Save UK file
save data_3, replace



*** data_4.dta ***

// United Kingdom
* Number of unemployed aged 16+
import excel "ur_uk.xlsx", sheet("Sheet2") firstrow clear
gen quarter = _n
save temp_uk1, replace 

* Number of unemployment benefit recipients 
import excel "ubr_uk.xlsx", sheet("Sheet1") firstrow clear
gen month = _n
gen quarter = 1
replace quarter = 2 if month>3 & month<7
replace quarter = 3 if month>6 & month<10
replace quarter = 4 if month>9 & month<13
replace quarter = 5 if month>12 & month<16
replace quarter = 6 if month>15 & month<19

* Merge
merge m:1 quarter using temp_uk1
drop _merge

* Calculate percentage of unemployment benefit receipt for each racial-ethnic group
gen uk_white_pct = White / White_un * 100
gen uk_minority_pct = Ethnicminority / EthnicMinority2_un * 100

* Save UK file
keep month quarter uk_white_pct uk_minority_pct
save temp_uk1, replace 

// Netherlands
* Number of unemployed aged 15-75 (in thousands)
import excel "ubr_nl.xlsx", sheet("Sheet1") firstrow clear
gen quarter = _n
replace native_un = native_un * 1000
replace migrant_un = migrant_un * 1000
replace western_migrant_un = western_migrant_un * 1000
replace nonwestern_migrant_un = nonwestern_migrant_un * 1000
save temp_nl1, replace 

* Number of unemployment benefit recipients 
import excel "ubr_nl.xlsx", sheet("Sheet2") firstrow clear
gen month = _n
gen quarter = 1
replace quarter = 2 if month>3 & month<7
replace quarter = 3 if month>6 & month<10
replace quarter = 4 if month>9 & month<13
replace quarter = 5 if month>12 & month<16
replace quarter = 6 if month>15 & month<19

* Merge
merge m:1 quarter using temp_nl1
drop _merge

* Calculate percentage of unemployment benefit receipt for each racial-ethnic group
gen nl_native_pct = native_ubr / native_un * 100
gen nl_western_pct = western_migrant_ubr / western_migrant_un * 100
gen nl_nonwestern_pct = nonwestern_migrant_ubr / nonwestern_migrant_un * 100

* Save NL file
keep time month quarter nl_native_pct nl_western_pct nl_nonwestern_pct
save temp_nl1, replace

* Merge UK and NL files
merge 1:1 month using temp_uk1
drop _merge
save data_4, replace 

* Erase temporary files 
erase temp_uk1.dta
erase temp_nl1.dta

